/*
	Estimates college graduate quality under alternative modeling assumptions.
	Output:	Table 9 (separately for each ALTERNATIVE)
			Table 10 (vertically)
			Table B2
			Figure B6a
			Figure B6b
			Estimates_z_c_ALTERNATIVE.dta
			Estimates_q_j_ALTERNATIVE.dta
*/
 
local seed "C:\Users\jsock\Dropbox\Research\GD\International"

local dataPath "`seed'/Data"
local inputPath "`seed'/InputData"
local figurePath "`seed'/Replication/Figures"
local tablePath "`seed'/Replication/Tables"
local estimatePath "`seed'/Replication/Estimates"
local tempPath "`seed'/Replication/TempData"

********************************************************
* Read in country gdp to get universitycountry gdp
********************************************************

preserve

	clear 
	 
	insheet using "`inputPath'/Exchange_rates_2022.csv" , comma
	
	keep if year >= 2010 
	keep if year <= 2021 
	
	bys country_glassdoor: egen avg_gdppw = mean(gdppw)
	generate log_gdppw_uc = ln(avg_gdppw)
	
	bys country_glassdoor: keep if _n == 1

	keep country_glassdoor iso log_gdppw_uc	
	keep if log_gdppw_uc != .
	sort country_glassdoor
	
	save "`tempPath'/Country_gdppw.dta" , replace
	
restore 

********************************************************
* Read in country gdp to get universitycountry gdp
********************************************************

clear 
set more off
set matsize 10000
set scheme s1mono

* Set path and load data 
insheet using "`dataPath'/Salaries_international_dataset_main.csv", comma
drop v1

drop if jobtitle == "" 

drop shortname  
drop city basecurrency country_iso   
drop sectorname iscurrentjobflag 

*--------------------------------
* Thresholds used for sample selection
*--------------------------------
.
scalar country_premia_thresh = 25
scalar selection_thresh = 25
scalar school_thresh = 25

scalar country_premia_thresh_t50 = 50
scalar selection_thresh_t50 = 50
scalar school_thresh_t50 = 50

*--------------------------------
* Exclude users that leave more than 10 reviews
*--------------------------------

sort fk_userid yearofsalary dateval salid

by fk_userid : gen obsNum = _n

by fk_userid : gen userReviews = _N

drop if userReviews > 10

*--------------------------------
* Generate additional variables
*--------------------------------

generate exp = yearsofrelevantexpnumber
generate exp_sqrd = exp ^ 2
drop yearsofrelevantexpnumber

generate fullsalary = basesalary + cashbonusyearly + stockbonusyearly + profitsharingyearly + salescommissionyearly + tipsyearly

generate logtotal = ln(fullsalary * ppp_xrat)
replace logtotal = . if dropnotannual == 1

generate logbase = ln(basesalary * ppp_xrat)

generate log_gdppw = ln(gdppw)

drop if ppp_xrat == .

*--------------------------------
* Exclude outliers in base pay
*--------------------------------

scalar scalingThresh = 10

generate realbase = basesalary * ppp_xrat
generate outside_thresh =  (realbase < (1/scalingThresh) * gdppw) | (realbase > scalingThresh * gdppw) 
drop realbase

*------------------------------------------
* Create Ranking Bins
*------------------------------------------

generate rankBin = ""
replace rankBin = "A_01_20" if inrange(world_rank,1,20)
replace rankBin = "B_21_50" if inrange(world_rank,21,50)
replace rankBin = "C_51_100" if inrange(world_rank,51,100)
replace rankBin = "D_101_250" if inrange(world_rank,101,250)
replace rankBin = "E_251_500" if inrange(world_rank,251,500)
replace rankBin = "F_501_1000" if inrange(world_rank,501,1000)
replace rankBin = "G_1001_2000" if inrange(world_rank,1001,2000)
replace rankBin = "H_UNRANKED" if world_rank == . & universitycountry != ""

generate national_rank_pct = national_rank / numberuniversities

*------------------------------------------
* Create Major of Study Bins
*------------------------------------------

generate majorStem = ""
replace majorStem = "Stem" if grpmajor == "Biological Sciences" | grpmajor == "Engineering" | grpmajor == "Physical Sciences" | grpmajor == "Technology"   
replace majorStem = "NonStem" if grpmajor == "Business" | grpmajor == "Social Sciences" | grpmajor == "Arts and Humanities" | grpmajor == "Communication" | grpmajor == "Education" | grpmajor == "Health Service" | grpmajor == "Social Service" 

*-------------------------------- 
* Drop Singapore Polytechnics which aren't considered by WHED
*--------------------------------

drop if school == "Nanyang Polytechnic"
drop if school == "Ngee Ann Polytechnic"
drop if school == "Republic Polytechnic"
drop if school == "Singapore Polytechnic"
drop if school == "Temasek Polytechnic"

*--------------------------------
* Generate indicators for wage in same country as university or not
*--------------------------------

generate home_country = countryname == universitycountry & universitycountry != ""

generate foreign_country = countryname != universitycountry & universitycountry != ""

generate foreign_country_2 = countryname != universitycountry_2 & universitycountry_2 != ""

*--------------------------------
* Generate indicators for valid in education analysis
*--------------------------------

* For first degree

generate hasDegree = degree != "UNMATCHED" & degree != "missing" & degree != ""  & degree != "HIGHSCHOOL"

generate uniDegree = degree == "BACHELORS" 

generate hasSchool = school != ""

* For second degree

generate hasDegree_2 = degree_2 != "UNMATCHED" & degree_2 != "missing" & degree_2 != ""  & degree != "HIGHSCHOOL"

generate uniDegree_2 = degree_2 != "ASSOCIATES" & degree_2 != "DIPLOMA"  & degree_2 != "HIGHSCHOOL" & degree_2 != "BACHELORS"

generate hasSchool_2 = school_2 != ""

replace degree_2 = "NONE" if degree_2 == ""

*--------------------------------
* Create college graduate partitions
*--------------------------------

generate onlyUndergrad = degree_2 == "NONE" 

generate alsoGrad = degree_2 == "JD" | degree_2 == "MASTERS" | degree_2 == "MBA" | degree_2 == "PHD" | degree_2 == "POSTGRAD" 

*--------------------------------
* Add work country name
*--------------------------------

generate country_glassdoor = countryname

merge m:1 country_glassdoor using "`tempPath'/Country_gdppw.dta"
drop _merge

rename iso work_country_iso
rename log_gdppw_uc log_gdppw_work_country 

drop country_glassdoor 

*--------------------------------
* Add country of study name
*--------------------------------

generate country_glassdoor = universitycountry

merge m:1 country_glassdoor using "`tempPath'/Country_gdppw.dta"
drop _merge

rename iso university_country_iso
rename log_gdppw_uc log_gdppw_university_country 

drop country_glassdoor 

*--------------------------------
* Add second university country name
*--------------------------------

generate country_glassdoor = universitycountry_2

merge m:1 country_glassdoor using "`tempPath'/Country_gdppw.dta"
drop _merge

rename iso university_country_iso_2

drop country_glassdoor log_gdppw_uc

*--------------------------------
* Keep baseline sample 
*--------------------------------

drop if employertypecode == "SELF_EMPLOYED" 

drop if outside_thresh

generate valid_educ = uniDegree & hasDegree & hasSchool & universitycountry != ""
keep if valid_educ

summarize alsoGrad, detail
scalar share_with_grad = r(mean)

********************************************************
* SAMPLE SIZE FOR COUNTRY PREMIA Z_C (MOVERS)
********************************************************

*--------------------------------
* Determine top destinations
*--------------------------------

sort fk_userid yearofsalary dateval salid

by fk_userid: generate destination = countryname[_n+1]

generate migrant = destination != countryname & destination != ""

sort destination 
by destination : egen destinationMigrants = sum(migrant)
by destination : replace destinationMigrants = . if destination == ""
by destination : replace destinationMigrants = . if _n > 1

replace destinationMigrants = destinationMigrants * -1

sort destinationMigrants 

* Determine top destination (1.0% of migrants)

egen totalMigrants = sum(destinationMigrants * -1)
	
generate migrant_share = -1 * destinationMigrants / totalMigrants
	
generate top_destination = migrant_share >= 0.01 & migrant_share != .

bys destination : egen topDestination = max(top_destination)

drop destinationMigrants top_destination

*--------------------------------
* Determine # of migrants to top ten destination for each origin
*--------------------------------

bys countryname : egen migrantsOverall = sum(migrant)  

bys countryname : egen migrantsTop = sum(migrant * topDestination)  

generate valid_country_premia = migrantsTop >= country_premia_thresh & migrantsTop != .

********************************************************
* ESTIMATE SKILL LOSS
********************************************************

*--------------------------------
* Setup for in and out analysis
*--------------------------------

sort fk_userid yearofsalary dateval salid

by fk_userid: gen prev_country = countryname[_n-1]
		
*--------------------------------
* Determine if migrated
*--------------------------------

generate migrated = countryname != prev_country & prev_country != ""

sort fk_userid yearofsalary dateval salid
by fk_userid: gen already_migrated = sum(migrated)

*--------------------------------
* Generate variable capturing delta_gdppw for migrants - between countries of work
*--------------------------------

sort fk_userid yearofsalary dateval salid
by fk_userid: gen prev_gdp = log_gdppw_work_country[_n-1]

generate delta_log_gdppw = abs(log_gdppw_work_country - prev_gdp)

generate migrant_delta_gdppw_work = 0
by fk_userid: replace migrant_delta_gdppw_work = delta_log_gdppw if migrated == 1 
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-1] if migrated == 0 & migrated[_n-1] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-2] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-3] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-4] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-5] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-6] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 0 & migrated[_n-6] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-7] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 0 & migrated[_n-6] == 0 & migrated[_n-7] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-8] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 0 & migrated[_n-6] == 0 & migrated[_n-7] == 0 & migrated[_n-8] == 1

*--------------------------------
* Generate coarse desination FE for migration 
*	Keep top destinations alone, group others by continent
*--------------------------------

	* Top destinations
	
	preserve

		keep destination topDestination
		rename destination countryname
		rename topDestination is_topDestination
		
		bys countryname : keep if _n == 1
		
		tempfile in_results
		save `in_results', emptyok
		
	restore

	merge m:1 countryname using `in_results'
	drop if _merge == 2
	drop _merge

	generate coarseDestination = ""
	replace coarseDestination = countryname if is_topDestination == 1
	drop is_topDestination

	* Continents
	
	preserve

		clear
		
		insheet using "`inputPath'\Country_continents.csv" , comma
		
		rename v2 countryname
		rename v7 continent
		keep countryname continent
		
		bys countryname : keep if _n == 1
		
		replace countryname = "Czech Republic" if countryname == "Czechia"
		replace countryname = "Iran" if countryname == "Iran (Islamic Republic of)"
		replace countryname = "Russia" if countryname == "Russian Federation"
		replace countryname = "Hong Kong" if countryname == "China, Hong Kong Special Administrative Region"
		replace countryname = "South Korea" if countryname == "Republic of Korea"
		replace countryname = "United Kingdom" if countryname == "United Kingdom of Great Britain and Northern Ireland"
		replace countryname = "United States" if countryname == "United States of America"
		replace countryname = "Vietnam" if countryname == "Viet Nam"
		
		tempfile in_results
		save `in_results', emptyok
		
	restore

	merge m:1 countryname using `in_results'
	drop if _merge == 2
	drop _merge

	replace coarseDestination = continent if coarseDestination == "" & valid_country_premia
	
	* Destination FE
	
	tab coarseDestination if valid_country_premia == 1, gen(c_)
	foreach my_var of varlist c_*{
		replace `my_var' = 0 if ~(migrated | already_migrated >= 1)
	}	
	
********************************************************
* STEP 0: SELECTION INTO GLASSDOOR
********************************************************

generate select = .
generate select_top = .
generate select_not = .

replace select = -0.08 if countryname == "India" & universitycountry == "India"
replace select_top = -0.15 if countryname == "India" & universitycountry == "India"
replace select_not = -0.04 if countryname == "India" & universitycountry == "India"

replace select = 0.43 if countryname == "Nigeria" & universitycountry == "Nigeria"
replace select_top = 0.41 if countryname == "Nigeria" & universitycountry == "Nigeria"
replace select_not = 0.45 if countryname == "Nigeria" & universitycountry == "Nigeria"

replace select = -0.05 if countryname == "Philippines" & universitycountry == "Philippines"
replace select_top = 0.00 if countryname == "Philippines" & universitycountry == "Philippines"
replace select_not = -0.06 if countryname == "Philippines" & universitycountry == "Philippines"

replace select = 0.10 if countryname == "China" & universitycountry == "China"
replace select_top = 0.15 if countryname == "China" & universitycountry == "China"
replace select_not = 0.02 if countryname == "China" & universitycountry == "China"

replace select = 0.22 if countryname == "Colombia" & universitycountry == "Colombia"
replace select_top = 0.41 if countryname == "Colombia" & universitycountry == "Colombia"
replace select_not = 0.14 if countryname == "Colombia" & universitycountry == "Colombia"

replace select = 0.19 if countryname == "South Africa" & universitycountry == "South Africa"
replace select_top = 0.00 if countryname == "South Africa" & universitycountry == "South Africa"
replace select_not = 0.27 if countryname == "South Africa" & universitycountry == "South Africa"

replace select = 0.31 if countryname == "Poland" & universitycountry == "Poland"
replace select_top = 0.36 if countryname == "Poland" & universitycountry == "Poland"
replace select_not = 0.22 if countryname == "Poland" & universitycountry == "Poland"

replace select = 0.05 if countryname == "South Korea" & universitycountry == "South Korea"
replace select_top = 0.03 if countryname == "South Korea" & universitycountry == "South Korea"
replace select_not = 0.09 if countryname == "South Korea" & universitycountry == "South Korea"

replace select = 0.02 if countryname == "Japan" & universitycountry == "Japan"
replace select_top = 0.04 if countryname == "Japan" & universitycountry == "Japan"
replace select_not = -0.06 if countryname == "Japan" & universitycountry == "Japan"

replace select = -0.03 if countryname == "New Zealand" & universitycountry == "New Zealand"
replace select_top = 0.00 if countryname == "New Zealand" & universitycountry == "New Zealand"
replace select_not = -0.04 if countryname == "New Zealand" & universitycountry == "New Zealand"

replace select = 0.03 if countryname == "United Kingdom" & universitycountry == "United Kingdom"
replace select_top = -0.09 if countryname == "United Kingdom" & universitycountry == "United Kingdom"
replace select_not = 0.04 if countryname == "United Kingdom" & universitycountry == "United Kingdom"

replace select = 0.00 if countryname == "Australia" & universitycountry == "Australia"
replace select_top = 0.03 if countryname == "Australia" & universitycountry == "Australia"
replace select_not = -0.01 if countryname == "Australia" & universitycountry == "Australia"

replace select = -0.09 if countryname == "Netherlands" & universitycountry == "Netherlands"
replace select_top = -0.04 if countryname == "Netherlands" & universitycountry == "Netherlands"
replace select_not = -0.11 if countryname == "Netherlands" & universitycountry == "Netherlands"

replace select = 0.35 if countryname == "Italy" & universitycountry == "Italy"
replace select_top = 0.27 if countryname == "Italy" & universitycountry == "Italy"
replace select_not = 0.41 if countryname == "Italy" & universitycountry == "Italy"

replace select = 0.03 if countryname == "United States" & universitycountry == "United States"
replace select_top = 0.02 if countryname == "United States" & universitycountry == "United States"
replace select_not = 0.04 if countryname == "United States" & universitycountry == "United States"

replace select = -0.10 if countryname == "Singapore" & universitycountry == "Singapore"
replace select_top = . if countryname == "Singapore" & universitycountry == "Singapore"
replace select_not = -0.10 if countryname == "Singapore" & universitycountry == "Singapore"

replace select = -0.02 if countryname == "Ireland" & universitycountry == "Ireland"
replace select_top = -0.03 if countryname == "Ireland" & universitycountry == "Ireland"
replace select_not = -0.01 if countryname == "Ireland" & universitycountry == "Ireland"

* Fill in missing values and creat selection indicator

bys countryname: egen selectionCountry = max(select != .)

replace select = 0 if select == . 
replace select_top = 0 if select_top == .

********************************************************
* ESTIMATE COUNTRY FE : Z_C
********************************************************

bys countryname : gen new_country = _n == 1

*--------------------------------
* Baseline
*--------------------------------

local ending "baseline"

reghdfe logbase exp exp_sqrd c_* migrant_delta_gdppw_work if valid_country_premia , absorb(fk_userid fe_country=countryname yearofsalary) vce(cluster countryname)

bys countryname : egen obs_`ending' =sum(e(sample))

bys countryname : egen z_c_`ending' = max(fe_country)

generate tau_`ending' = 0
foreach my_var of varlist c_* {
	
	bys coarseDestination : egen fill_`my_var' = max(`my_var')
	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"] 
	replace tau_`ending' = tau_`ending' + fill_`my_var' * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 
	drop fill_*
	
}
foreach my_var of varlist migrant_delta_gdppw_work {

	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"]
	replace tau_`ending' = tau_`ending' + abs(log_gdppw_work_country - log_gdppw_university_country) * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 

}

generate logbase_z_c_`ending' = logbase - z_c_`ending' - tau_`ending'

drop fe_country

* Correlation of z with baseline
pwcorr z_c_`ending' z_c_baseline if new_country
matrix corrMat = r(C)
scalar corr_z_`ending' = corrMat[1,2]

* Correlation of z with gdppw
pwcorr z_c_`ending' log_gdppw_work_country if new_country
matrix corrMat = r(C)
scalar corr_gdp_`ending' = corrMat[1,2]

********************************************************
* Additional Setup for robustness
********************************************************

*--------------------------------
* Setup for in and out analysis
*--------------------------------

sort fk_userid yearofsalary dateval salid

by fk_userid: gen next_logbase = logbase[_n+1]
by fk_userid: gen next_country = countryname[_n+1]
by fk_userid: gen next_year = yearofsalary[_n+1]
by fk_userid: gen next_gdppw = log_gdppw_work_country[_n+1]

generate delta_logbase = next_logbase - logbase
generate delta_years = next_year - yearofsalary
generate delta_gdppw = next_gdppw - log_gdppw_work_country

generate abs_delta_gdppw = abs(delta_gdppw)

by fk_userid: gen prev_year = yearofsalary[_n-1]
		
by fk_userid: gen will_migrate = migrated[_n+1] == 1

*--------------------------------
* Consider only migrations where more time has elapsed
*--------------------------------

local ending "longTime"

generate migrate_years = yearofsalary - prev_year 
replace migrate_years = . if ~migrated

bys countryname : egen migrantsTopLong = sum(topDestination * (will_migrate & delta_years >= 4 & delta_years != .))  

generate valid_country_premia_long = migrantsTopLong >= country_premia_thresh & migrantsTopLong != .

reghdfe logbase exp exp_sqrd c_* migrant_delta_gdppw_work if valid_country_premia_long & ~(migrate_years < 4 & migrate_years != .), absorb(fk_userid fe_country=countryname yearofsalary) vce(cluster countryname)

bys countryname : egen obs_`ending' =sum(e(sample))

bys countryname : egen z_c_`ending' = max(fe_country)

generate tau_`ending' = 0
foreach my_var of varlist c_* {
	
	bys coarseDestination : egen fill_`my_var' = max(`my_var')
	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"] 
	replace tau_`ending' = tau_`ending' + fill_`my_var' * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 
	drop fill_*
	
}
foreach my_var of varlist migrant_delta_gdppw_work {

	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"]
	replace tau_`ending' = tau_`ending' + abs(log_gdppw_work_country - log_gdppw_university_country) * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 

}

generate logbase_z_c_`ending' = logbase - z_c_`ending' - tau_`ending'

drop fe_country

* Correlation of z with baseline
pwcorr z_c_`ending' z_c_baseline if new_country
matrix corrMat = r(C)
scalar corr_z_`ending' = corrMat[1,2]

* Correlation of z with gdppw
pwcorr z_c_`ending' log_gdppw_work_country if new_country
matrix corrMat = r(C)
scalar corr_gdp_`ending' = corrMat[1,2]

drop migrantsTopLong

*--------------------------------
* Excluding Founders and C-suite
*--------------------------------

local ending "excFounderCSuite"

generate lower_job = lower(jobtitle)

* Founder job title
generate founder = 0
replace founder = 1 if strpos(lower_job,"founder") 
replace founder = 1 if strpos(lower_job,"gründer") 
replace founder = 1 if strpos(lower_job,"fundador") 
replace founder = 1 if strpos(lower_job,"fondatore") 
replace founder = 1 if strpos(lower_job,"fondatrice") 
replace founder = 1 if strpos(lower_job,"fondateur") 
replace founder = 1 if strpos(lower_job,"założyciel") 
replace founder = 1 if strpos(lower_job,"kurucu") 
replace founder = 1 if strpos(lower_job,"创办人") 
replace founder = 1 if strpos(lower_job,"основатель") 
replace founder = 1 if strpos(lower_job,"pendiri") 
replace founder = 1 if strpos(lower_job,"fondator") 
replace founder = 1 if strpos(lower_job,"oprichter") 
replace founder = 1 if strpos(lower_job,"zakladatel") 
replace founder = 1 if strpos(lower_job,"grundare") 
replace founder = 1 if strpos(lower_job,"alapító") 

* Csuite job title
generate csuite = 0
replace csuite = 1 if strpos(lower_job,"chief") & strpos(lower_job,"officer") 
replace csuite = 1 if strpos(lower_job,"CEO") 
replace csuite = 1 if strpos(lower_job,"CFO") 
replace csuite = 1 if strpos(lower_job,"CIO") 
replace csuite = 1 if strpos(lower_job,"COO") 
replace csuite = 1 if strpos(lower_job,"CMO") 
replace csuite = 1 if strpos(lower_job,"CTO") 
replace csuite = 1 if strpos(lower_job,"geschäftsführer") 
replace csuite = 1 if strpos(lower_job,"director ejecutivo") 
replace csuite = 1 if strpos(lower_job,"amministratore delegato") 
replace csuite = 1 if strpos(lower_job,"diretor executivo") 
replace csuite = 1 if strpos(lower_job,"dyrektor generalny") 
replace csuite = 1 if strpos(lower_job,"director executiv") 
replace csuite = 1 if strpos(lower_job,"directeur") 
replace csuite = 1 if strpos(lower_job,"výkonný ředitel") 
replace csuite = 1 if strpos(lower_job,"verkställande direktör") 

reghdfe logbase exp exp_sqrd c_* migrant_delta_gdppw_work if valid_country_premia & ~founder & ~csuite, absorb(fk_userid fe_country=countryname yearofsalary) vce(cluster countryname)

bys countryname : egen obs_`ending' =sum(e(sample))

bys countryname : egen z_c_`ending' = max(fe_country)

generate tau_`ending' = 0
foreach my_var of varlist c_* {
	
	bys coarseDestination : egen fill_`my_var' = max(`my_var')
	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"] 
	replace tau_`ending' = tau_`ending' + fill_`my_var' * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 
	drop fill_*
	
}
foreach my_var of varlist migrant_delta_gdppw_work {

	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"]
	replace tau_`ending' = tau_`ending' + abs(log_gdppw_work_country - log_gdppw_university_country) * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 

}

generate logbase_z_c_`ending' = logbase - z_c_`ending' - tau_`ending'

drop fe_country

* Correlation of z with baseline
pwcorr z_c_`ending' z_c_baseline if new_country
matrix corrMat = r(C)
scalar corr_z_`ending' = corrMat[1,2]

* Correlation of z with gdppw
pwcorr z_c_`ending' log_gdppw_work_country if new_country
matrix corrMat = r(C)
scalar corr_gdp_`ending' = corrMat[1,2]

*--------------------------------
* Baseline
*--------------------------------

local ending "obsNumFE"

reghdfe logbase exp exp_sqrd c_* migrant_delta_gdppw_work if valid_country_premia , absorb(fk_userid fe_country=countryname yearofsalary obsNum) vce(cluster countryname)

bys countryname : egen obs_`ending' =sum(e(sample))

bys countryname : egen z_c_`ending' = max(fe_country)

generate tau_`ending' = 0
foreach my_var of varlist c_* {
	
	bys coarseDestination : egen fill_`my_var' = max(`my_var')
	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"] 
	replace tau_`ending' = tau_`ending' + fill_`my_var' * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 
	drop fill_*
	
}
foreach my_var of varlist migrant_delta_gdppw_work {

	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"]
	replace tau_`ending' = tau_`ending' + abs(log_gdppw_work_country - log_gdppw_university_country) * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 

}

generate logbase_z_c_`ending' = logbase - z_c_`ending' - tau_`ending'

drop fe_country

* Correlation of z with baseline
pwcorr z_c_`ending' z_c_baseline if new_country
matrix corrMat = r(C)
scalar corr_z_`ending' = corrMat[1,2]

* Correlation of z with gdppw
pwcorr z_c_`ending' log_gdppw_work_country if new_country
matrix corrMat = r(C)
scalar corr_gdp_`ending' = corrMat[1,2]

*--------------------------------
* Earnings change at migration summary stats
*--------------------------------

sort fk_userid yearofsalary dateval salid

by fk_userid: gen next_z = z_c_baseline[_n+1]

generate delta_z = next_z - z_c_baseline
generate delta_z_ratio = delta_z / delta_gdppw

generate delta_ratio = delta_logbase / delta_gdppw

preserve

	keep if will_migrate == 1 & abs_delta_gdppw > 0.25
	
	keep delta_ratio
	
	outreg2 using "`estimatePath'\Migration_gains_gdppw.xls", replace sum(detail) 

restore


preserve

	keep if will_migrate == 1 & next_country == "United States" & delta_gdppw > ln(4)
	
	keep delta_ratio delta_z_ratio
	
	outreg2 using "`estimatePath'\Migration_gains_gdppw_US_poor.xls", replace sum(detail) 

restore


preserve

	keep if will_migrate == 1 & next_country == "United States" & countryname == "India"
	
	keep delta_ratio
	
	outreg2 using "`estimatePath'\Migration_gains_gdppw_India_US.xls", replace sum(detail) 

restore


preserve

	keep if will_migrate == 1 & next_country == "United States" & delta_gdppw > ln(4) & countryname ~= "India"
	
	keep delta_ratio
	
	outreg2 using "`estimatePath'\Migration_gains_gdppw_notIndia_US.xls", replace sum(detail) 

restore

*--------------------------------
* Exclude wage-pairs for Indian migrants to the U.S. 
*--------------------------------

local ending "excludeIndiaToUS"

reghdfe logbase exp exp_sqrd c_* migrant_delta_gdppw_work if valid_country_premia & ~(countryname == "United States" & prev_country== "India") & ~(countryname == "India" & next_country== "United States"), absorb(fk_userid jobtitle fe_country=countryname yearofsalary ) vce(cluster countryname)

bys countryname : egen obs_`ending' =sum(e(sample))

bys countryname : egen z_c_`ending' = max(fe_country)

generate tau_`ending' = 0
foreach my_var of varlist c_* {
	
	bys coarseDestination : egen fill_`my_var' = max(`my_var')
	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"] 
	replace tau_`ending' = tau_`ending' + fill_`my_var' * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 
	drop fill_*
	
}
foreach my_var of varlist migrant_delta_gdppw_work {

	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"]
	replace tau_`ending' = tau_`ending' + abs(log_gdppw_work_country - log_gdppw_university_country) * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 

}

generate logbase_z_c_`ending' = logbase - z_c_`ending' - tau_`ending'

drop fe_country

* Correlation of z with baseline
pwcorr z_c_`ending' z_c_baseline if new_country
matrix corrMat = r(C)
scalar corr_z_`ending' = corrMat[1,2]

* Correlation of z with gdppw
pwcorr z_c_`ending' log_gdppw_work_country if new_country
matrix corrMat = r(C)
scalar corr_gdp_`ending' = corrMat[1,2]

*--------------------------------
* Project onto GDP
*--------------------------------

local ending "projectGDP"

regress z_c_baseline log_gdppw_work_country if new_country 

predict z_fitted if e(sample)

bys countryname : egen z_c_`ending' = max(z_fitted)

generate logbase_z_c_`ending' = logbase - z_c_`ending' - tau_baseline

drop z_fitted

* Correlation of z with baseline
pwcorr z_c_`ending' z_c_baseline if new_country
matrix corrMat = r(C)
scalar corr_z_`ending' = corrMat[1,2]

* Correlation of z with gdppw
pwcorr z_c_`ending' log_gdppw_work_country if new_country
matrix corrMat = r(C)
scalar corr_gdp_`ending' = corrMat[1,2]

*--------------------------------
* Include variable income
*--------------------------------

local ending "addVP"

generate realtotal = fullsalary * ppp_xrat
generate outside_thresh_total =  (realtotal < (1/scalingThresh) * gdppw) | (realtotal > scalingThresh * gdppw) 
drop realtotal

reghdfe logtotal exp exp_sqrd c_* migrant_delta_gdppw_work if valid_country_premia & ~outside_thresh_total, absorb(fk_userid fe_country=countryname yearofsalary ) vce(cluster countryname)

bys countryname : egen obs_`ending' =sum(e(sample))

bys countryname : egen z_c_`ending' = max(fe_country)

generate tau_`ending' = 0
foreach my_var of varlist c_* {
	
	bys coarseDestination : egen fill_`my_var' = max(`my_var')
	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"] 
	replace tau_`ending' = tau_`ending' + fill_`my_var' * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 
	drop fill_*
	
}
foreach my_var of varlist migrant_delta_gdppw_work {

	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"]
	replace tau_`ending' = tau_`ending' + abs(log_gdppw_work_country - log_gdppw_university_country) * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 

}

generate logtotal_z_c_`ending' = logtotal - z_c_`ending' - tau_`ending'

drop fe_country

* Correlation of z with baseline
pwcorr z_c_`ending' z_c_baseline if new_country
matrix corrMat = r(C)
scalar corr_z_`ending' = corrMat[1,2]

* Correlation of z with gdppw
pwcorr z_c_`ending' log_gdppw_work_country if new_country
matrix corrMat = r(C)
scalar corr_gdp_`ending' = corrMat[1,2]

*--------------------------------
* Exclude U.S.
*--------------------------------

local ending "excludeUS"

bys countryname : egen migrantsExcludeUS = sum(migrant & topDestination & destination != "United States")  

generate valid_country_premia_`ending' = migrantsExcludeUS >= country_premia_thresh & migrantsExcludeUS != .

reghdfe logbase exp exp_sqrd c_* migrant_delta_gdppw_work if valid_country_premia_`ending' & countryname != "United States", absorb(fk_userid fe_country=countryname yearofsalary ) vce(cluster countryname)

bys countryname : egen obs_`ending' =sum(e(sample))

bys countryname : egen z_c_`ending' = max(fe_country)

generate tau_`ending' = 0
foreach my_var of varlist c_* {
	
	bys coarseDestination : egen fill_`my_var' = max(`my_var')
	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"] 
	replace tau_`ending' = tau_`ending' + fill_`my_var' * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 
	drop fill_*
	
}
foreach my_var of varlist migrant_delta_gdppw_work {

	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"]
	replace tau_`ending' = tau_`ending' + abs(log_gdppw_work_country - log_gdppw_university_country) * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 

}

generate logbase_z_c_`ending' = logbase - z_c_`ending' - tau_`ending'

drop fe_country

* Correlation of z with baseline
pwcorr z_c_`ending' z_c_baseline if new_country
matrix corrMat = r(C)
scalar corr_z_`ending' = corrMat[1,2]

* Correlation of z with gdppw
pwcorr z_c_`ending' log_gdppw_work_country if new_country
matrix corrMat = r(C)
scalar corr_gdp_`ending' = corrMat[1,2]

*--------------------------------
* Add firm FE
*--------------------------------

local ending "firm"

reghdfe logbase exp exp_sqrd c_* migrant_delta_gdppw_work if valid_country_premia , absorb(fk_userid fk_employerid fe_country=countryname yearofsalary ) vce(cluster countryname)

bys countryname : egen obs_`ending' =sum(e(sample))

bys countryname : egen z_c_`ending' = max(fe_country)

generate tau_`ending' = 0
foreach my_var of varlist c_* {
	
	bys coarseDestination : egen fill_`my_var' = max(`my_var')
	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"] 
	replace tau_`ending' = tau_`ending' + fill_`my_var' * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 
	drop fill_*
	
}
foreach my_var of varlist migrant_delta_gdppw_work {

	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"]
	replace tau_`ending' = tau_`ending' + abs(log_gdppw_work_country - log_gdppw_university_country) * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 

}

generate logbase_z_c_`ending' = logbase - z_c_`ending' - tau_`ending'

drop fe_country

* Correlation of z with baseline
pwcorr z_c_`ending' z_c_baseline if new_country
matrix corrMat = r(C)
scalar corr_z_`ending' = corrMat[1,2]

* Correlation of z with gdppw
pwcorr z_c_`ending' log_gdppw_work_country if new_country
matrix corrMat = r(C)
scalar corr_gdp_`ending' = corrMat[1,2]

*--------------------------------
* Add metro FE
*--------------------------------

local ending "metro"

reghdfe logbase exp exp_sqrd c_* migrant_delta_gdppw_work if valid_country_premia , absorb(fk_userid fe_country=countryname yearofsalary ) vce(cluster countryname)

bys countryname : egen obs_`ending' =sum(e(sample))

bys countryname : egen z_c_`ending' = max(fe_country)

generate tau_`ending' = 0
foreach my_var of varlist c_* {
	
	bys coarseDestination : egen fill_`my_var' = max(`my_var')
	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"] 
	replace tau_`ending' = tau_`ending' + fill_`my_var' * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 
	drop fill_*
	
}
foreach my_var of varlist migrant_delta_gdppw_work {

	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"]
	replace tau_`ending' = tau_`ending' + abs(log_gdppw_work_country - log_gdppw_university_country) * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 

}

generate logbase_z_c_`ending' = logbase - z_c_`ending' - tau_`ending'

drop fe_country

* Correlation of z with baseline
pwcorr z_c_`ending' z_c_baseline if new_country
matrix corrMat = r(C)
scalar corr_z_`ending' = corrMat[1,2]

* Correlation of z with gdppw
pwcorr z_c_`ending' log_gdppw_work_country if new_country
matrix corrMat = r(C)
scalar corr_gdp_`ending' = corrMat[1,2]

*--------------------------------
* Add job title FE
*--------------------------------

local ending "job"

reghdfe logbase exp exp_sqrd c_* migrant_delta_gdppw_work if valid_country_premia , absorb(fk_userid jobtitle fe_country=countryname yearofsalary ) vce(cluster countryname)

bys countryname : egen obs_`ending' =sum(e(sample))

bys countryname : egen z_c_`ending' = max(fe_country)

generate tau_`ending' = 0
foreach my_var of varlist c_* {
	
	bys coarseDestination : egen fill_`my_var' = max(`my_var')
	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"] 
	replace tau_`ending' = tau_`ending' + fill_`my_var' * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 
	drop fill_*
	
}
foreach my_var of varlist migrant_delta_gdppw_work {

	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"]
	replace tau_`ending' = tau_`ending' + abs(log_gdppw_work_country - log_gdppw_university_country) * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 

}

generate logbase_z_c_`ending' = logbase - z_c_`ending' - tau_`ending'

drop fe_country

* Correlation of z with baseline
pwcorr z_c_`ending' z_c_baseline if new_country
matrix corrMat = r(C)
scalar corr_z_`ending' = corrMat[1,2]

* Correlation of z with gdppw
pwcorr z_c_`ending' log_gdppw_work_country if new_country
matrix corrMat = r(C)
scalar corr_gdp_`ending' = corrMat[1,2]

*--------------------------------
* Baseline with N=50 thresh
*--------------------------------

local ending "thresh50"

reghdfe logbase exp exp_sqrd c_* migrant_delta_gdppw_work if valid_country_premia & migrantsTop >= country_premia_thresh_t50 & migrantsTop != ., absorb(fk_userid fe_country=countryname yearofsalary ) vce(cluster countryname)

bys countryname : egen obs_`ending' =sum(e(sample))

bys countryname : egen z_c_`ending' = max(fe_country)

generate tau_`ending' = 0
foreach my_var of varlist c_* {
	
	bys coarseDestination : egen fill_`my_var' = max(`my_var')
	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"] 
	replace tau_`ending' = tau_`ending' + fill_`my_var' * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 
	drop fill_*
	
}
foreach my_var of varlist migrant_delta_gdppw_work {

	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"]
	replace tau_`ending' = tau_`ending' + abs(log_gdppw_work_country - log_gdppw_university_country) * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 

}

generate logbase_z_c_`ending' = logbase - z_c_`ending' - tau_`ending'

drop fe_country

* Correlation of z with baseline
pwcorr z_c_`ending' z_c_baseline if new_country
matrix corrMat = r(C)
scalar corr_z_`ending' = corrMat[1,2]

* Correlation of z with gdppw
pwcorr z_c_`ending' log_gdppw_work_country if new_country
matrix corrMat = r(C)
scalar corr_gdp_`ending' = corrMat[1,2]

*--------------------------------
* Include major of study FE
*--------------------------------

local ending "majorFE"

bys countryname : egen migrantsMajorFE = sum(migrant & topDestination & majorStem != "")  

generate valid_country_premia_`ending' = migrantsMajorFE >= country_premia_thresh & migrantsMajorFE != .

reghdfe logbase exp exp_sqrd c_* migrant_delta_gdppw_work if valid_country_premia_`ending' & majorStem != "", absorb(fk_userid fe_country=countryname yearofsalary ) vce(cluster countryname)

bys countryname : egen obs_`ending' =sum(e(sample))

bys countryname : egen z_c_`ending' = max(fe_country)

generate tau_`ending' = 0
foreach my_var of varlist c_* {
	
	bys coarseDestination : egen fill_`my_var' = max(`my_var')
	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"] 
	replace tau_`ending' = tau_`ending' + fill_`my_var' * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 
	drop fill_*
	
}
foreach my_var of varlist migrant_delta_gdppw_work {

	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"]
	replace tau_`ending' = tau_`ending' + abs(log_gdppw_work_country - log_gdppw_university_country) * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 

}

generate logbase_z_c_`ending' = logbase - z_c_`ending' - tau_`ending'

drop fe_country

* Correlation of z with baseline
pwcorr z_c_`ending' z_c_baseline if new_country
matrix corrMat = r(C)
scalar corr_z_`ending' = corrMat[1,2]

* Correlation of z with gdppw
pwcorr z_c_`ending' log_gdppw_work_country if new_country
matrix corrMat = r(C)
scalar corr_gdp_`ending' = corrMat[1,2]

*--------------------------------
* At most undergrad
*--------------------------------

local ending "onlyBach"

bys countryname : egen migrantsOnlyUndergrad = sum(migrant & topDestination & onlyUndergrad)  

generate valid_country_premia_`ending' = migrantsOnlyUndergrad >= country_premia_thresh & migrantsMajorFE != .

reghdfe logbase exp exp_sqrd c_* migrant_delta_gdppw_work if valid_country_premia_`ending' & onlyUndergrad, absorb(fk_userid fe_country=countryname yearofsalary ) vce(cluster countryname)

bys countryname : egen obs_`ending' =sum(e(sample))

bys countryname : egen z_c_`ending' = max(fe_country)

generate tau_`ending' = 0
foreach my_var of varlist c_* {
	
	bys coarseDestination : egen fill_`my_var' = max(`my_var')
	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"] 
	replace tau_`ending' = tau_`ending' + fill_`my_var' * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 
	drop fill_*
	
}
foreach my_var of varlist migrant_delta_gdppw_work {

	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"]
	replace tau_`ending' = tau_`ending' + abs(log_gdppw_work_country - log_gdppw_university_country) * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 

}

generate logbase_z_c_`ending' = logbase - z_c_`ending' - tau_`ending'

drop fe_country

* Correlation of z with baseline
pwcorr z_c_`ending' z_c_baseline if new_country
matrix corrMat = r(C)
scalar corr_z_`ending' = corrMat[1,2]

* Correlation of z with gdppw
pwcorr z_c_`ending' log_gdppw_work_country if new_country
matrix corrMat = r(C)
scalar corr_gdp_`ending' = corrMat[1,2]

*--------------------------------
* Separate undergrad and grad
*--------------------------------

local ending "separateGrad"

generate z_c_`ending' = z_c_baseline

generate tau_`ending' = tau_baseline

generate logbase_z_c_`ending' = logbase - z_c_`ending' - tau_`ending'

* Correlation of z with baseline
pwcorr z_c_`ending' z_c_baseline if new_country
matrix corrMat = r(C)
scalar corr_z_`ending' = corrMat[1,2]

* Correlation of z with gdppw
pwcorr z_c_`ending' log_gdppw_work_country if new_country
matrix corrMat = r(C)
scalar corr_gdp_`ending' = corrMat[1,2]

*--------------------------------
* Include non-domestic workers
*-------------------------------- 

local ending "includeAbroad"

generate z_c_`ending' = z_c_baseline

generate tau_`ending' = tau_baseline

generate logbase_z_c_`ending' = logbase - z_c_`ending' - tau_`ending'

* Correlation of z with baseline
pwcorr z_c_`ending' z_c_baseline if new_country
matrix corrMat = r(C)
scalar corr_z_`ending' = corrMat[1,2]

* Correlation of z with gdppw
pwcorr z_c_`ending' log_gdppw_work_country if new_country
matrix corrMat = r(C)
scalar corr_gdp_`ending' = corrMat[1,2]

*--------------------------------
* At most undergrad
*--------------------------------

local ending "includeAbroadWt"

generate z_c_`ending' = z_c_baseline

generate tau_`ending' = tau_baseline

generate logbase_z_c_`ending' = logbase - z_c_`ending' - tau_`ending'

* Correlation of z with baseline
pwcorr z_c_`ending' z_c_baseline if new_country
matrix corrMat = r(C)
scalar corr_z_`ending' = corrMat[1,2]

* Correlation of z with gdppw
pwcorr z_c_`ending' log_gdppw_work_country if new_country
matrix corrMat = r(C)
scalar corr_gdp_`ending' = corrMat[1,2]

*--------------------------------
* Selection adjusted for top and non-top together
*--------------------------------

local ending "selection"

generate logbase_`ending' = logbase 
replace logbase_`ending' = logbase_`ending' - select_top if national_rank_pct <= 0.05
replace logbase_`ending' = logbase_`ending' - select_not if national_rank_pct > 0.05 | national_rank_pct == .

reghdfe logbase_`ending' exp exp_sqrd c_* migrant_delta_gdppw_work if valid_country_premia , absorb(fk_userid fe_country=countryname yearofsalary) vce(cluster countryname)

bys countryname : egen obs_`ending' =sum(e(sample))

bys countryname : egen z_c_`ending' = max(fe_country)

generate tau_`ending' = 0
foreach my_var of varlist c_* {
	
	bys coarseDestination : egen fill_`my_var' = max(`my_var')
	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"] 
	replace tau_`ending' = tau_`ending' + fill_`my_var' * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 
	drop fill_*
	
}
foreach my_var of varlist migrant_delta_gdppw_work {

	replace tau_`ending' = tau_`ending' + `my_var' * _b["`my_var'"]
	replace tau_`ending' = tau_`ending' + abs(log_gdppw_work_country - log_gdppw_university_country) * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 

}

generate logbase_z_c_`ending' = logbase - z_c_`ending' - tau_`ending'
replace logbase_z_c_`ending' = logbase_z_c_`ending' - select_top if national_rank_pct <= 0.05
replace logbase_z_c_`ending' = logbase_z_c_`ending' - select_not if national_rank_pct > 0.05 | national_rank_pct == .

* Correlation of z with baseline
pwcorr z_c_`ending' z_c_baseline if new_country
matrix corrMat = r(C)
scalar corr_z_`ending' = corrMat[1,2]

* Correlation of z with gdppw
pwcorr z_c_`ending' log_gdppw_work_country if new_country
matrix corrMat = r(C)
scalar corr_gdp_`ending' = corrMat[1,2]

*--------------------------------
* Merge in alternative truncation results
*--------------------------------

local ending "altTrunc"

merge m:1 countryname using "`estimatePath'\Estimates_z_c_`ending'.dta"

drop if _merge == 2
drop _merge

* Correlation of z with baseline
pwcorr z_c_`ending' z_c_baseline if new_country
matrix corrMat = r(C)
scalar corr_z_`ending' = corrMat[1,2]

* Correlation of z with gdppw
pwcorr z_c_`ending' log_gdppw_work_country if new_country
matrix corrMat = r(C)
scalar corr_gdp_`ending' = corrMat[1,2]

*--------------------------------
* Merge in impute missing degree results
*--------------------------------

local ending "imputeMissing"

merge m:1 countryname using "`estimatePath'\Estimates_z_c_`ending'.dta"

drop if _merge == 2
drop _merge

* Correlation of z with baseline
pwcorr z_c_`ending' z_c_baseline if new_country
matrix corrMat = r(C)
scalar corr_z_`ending' = corrMat[1,2]

* Correlation of z with gdppw
pwcorr z_c_`ending' log_gdppw_work_country if new_country
matrix corrMat = r(C)
scalar corr_gdp_`ending' = corrMat[1,2]

********************************************************
* STEP 1B: KEEP ONLY EDUCATION SAMPLE NOW THAT MOVERS HAVE BEEN USED
********************************************************

generate hasCountryPremia = z_c_baseline != .

keep if valid_educ & hasCountryPremia & school != ""

********************************************************
* Determine step 2 sample thresholds 
********************************************************

bys school: egen schoolObs = sum(hasCountryPremia & valid_educ)

bys school: egen schoolObsLocal = sum(hasCountryPremia & valid_educ & home_country)

generate valid_school = schoolObsLocal >= school_thresh 

********************************************************
* Determine step 2 sample thresholds for N=50 cutoff
********************************************************

generate hasCountryPremia_t50= z_c_thresh50 != .

bys school: egen schoolObs_t50 = sum(hasCountryPremia_t50 & valid_educ)

bys school: egen schoolObsLocal_t50 = sum(hasCountryPremia_t50 & valid_educ & home_country)

generate valid_school_t50 = schoolObsLocal_t50 >= school_thresh_t50 

********************************************************
* Determine step 2 sample thresholds for drop first migration obs
********************************************************

generate hasCountryPremiaLongTime = z_c_longTime != . & valid_country_premia_long

bys school: egen schoolObs_long = sum(hasCountryPremiaLongTime & valid_educ )

bys school: egen schoolObsLocal_long = sum(hasCountryPremiaLongTime & valid_educ & home_country )

generate valid_school_long = schoolObsLocal_long >= school_thresh

********************************************************
* Determine step 2 sample thresholds for excluding US
********************************************************

generate hasCountryPremiaExcludeUS = z_c_excludeUS != . 

bys school: egen schoolObs_excUS = sum(hasCountryPremiaExcludeUS & valid_educ )

bys school: egen schoolObsLocal_excUS = sum(hasCountryPremiaExcludeUS & valid_educ & home_country )

generate valid_school_excUS = schoolObsLocal_excUS >= school_thresh 

********************************************************
* Determine step 2 sample thresholds for selection sample
********************************************************

generate hasCountryPremiaSelection = z_c_baseline != . & selectionCountry

bys school: egen schoolObs_selec = sum(hasCountryPremiaSelection & valid_educ )

bys school: egen schoolObsLocal_selec = sum(hasCountryPremiaSelection & valid_educ & home_country )

generate valid_school_selec = schoolObsLocal_selec >= school_thresh 

********************************************************
* Determine step 2 sample thresholds for at most undergrad sample
********************************************************

generate hasCountryPremiaOnlyU = z_c_onlyBach != .

bys school: egen schoolObs_onlyu = sum(hasCountryPremiaOnlyU & valid_educ & onlyUndergrad )

bys school: egen schoolObsLocal_onlyu = sum(hasCountryPremiaOnlyU & valid_educ & home_country & onlyUndergrad)

generate valid_school_onlyu = schoolObsLocal_onlyu >= school_thresh 

********************************************************
* Determine step 2 sample thresholds for has major
********************************************************

generate hasCountryPremiaMajor = z_c_majorFE != . 

bys school: egen schoolObs_major = sum(hasCountryPremiaMajor & valid_educ & majorStem != "")

bys school: egen schoolObsLocal_major = sum(hasCountryPremiaMajor & valid_educ & home_country & majorStem != "")

generate valid_school_major = schoolObsLocal_major >= school_thresh 

********************************************************
* ESTIMATE SCHOOL FE 
********************************************************

*--------------------------------
* Baseline
*--------------------------------

local ending "baseline"

reghdfe logbase_z_c_`ending' exp exp_sqrd if valid_educ & valid_school & home_country & universitycountry != "", absorb(fe_school=school yearofsalary)

bys school : egen obs_school_`ending' =sum(e(sample))

bys school : egen q_j_`ending' = max(fe_school)

drop fe_school

*--------------------------------
* Exclude first migration observation
*--------------------------------

local ending "longTime"

reghdfe logbase_z_c_`ending' exp exp_sqrd if valid_educ & valid_school_long &  home_country & universitycountry != "" & ~(migrate_years < 4 & migrate_years != .), absorb(fe_school=school yearofsalary)

bys school : egen obs_school_`ending' =sum(e(sample))

bys school : egen q_j_`ending' = max(fe_school)

drop fe_school

*--------------------------------
* Excluding Founders and C-suite
*--------------------------------

local ending "excFounderCSuite"

reghdfe logbase_z_c_`ending' exp exp_sqrd if valid_educ & valid_school & home_country & universitycountry != "" & ~founder & ~csuite , absorb(fe_school=school yearofsalary)

bys school : egen obs_school_`ending' =sum(e(sample))

bys school : egen q_j_`ending' = max(fe_school)

drop fe_school

*--------------------------------
* With FE for obs number
*--------------------------------

local ending "obsNumFE"

reghdfe logbase_z_c_`ending' exp exp_sqrd if valid_educ & valid_school & home_country & universitycountry != "", absorb(fe_school=school yearofsalary obsNum)

bys school : egen obs_school_`ending' =sum(e(sample))

bys school : egen q_j_`ending' = max(fe_school)

drop fe_school

*--------------------------------
* Exclude wage-pairs for Indian migrants to the U.S. 
*--------------------------------

local ending "excludeIndiaToUS"

reghdfe logbase_z_c_`ending' exp exp_sqrd if valid_educ & valid_school & home_country & universitycountry != "" & ~(countryname == "United States" & prev_country== "India") & ~(countryname == "India" & next_country== "United States"), absorb(fe_school=school yearofsalary)

bys school : egen obs_school_`ending' =sum(e(sample))

bys school : egen q_j_`ending' = max(fe_school)

drop fe_school

*--------------------------------
* Project onto GDP
*--------------------------------

local ending "projectGDP"

reghdfe logbase_z_c_`ending' exp exp_sqrd if valid_educ & valid_school & home_country & universitycountry != "", absorb(fe_school=school yearofsalary)

bys school : egen obs_school_`ending' =sum(e(sample))

bys school : egen q_j_`ending' = max(fe_school)

drop fe_school

*--------------------------------
* Incorporate VP
*--------------------------------

local ending "addVP"

reghdfe logtotal_z_c_`ending' exp exp_sqrd if valid_educ & valid_school & home_country & universitycountry != "" & ~outside_thresh_total, absorb(fe_school=school yearofsalary)

bys school : egen obs_school_`ending' =sum(e(sample))

bys school : egen q_j_`ending' = max(fe_school)

drop fe_school

*--------------------------------
* Selection adjusted
*--------------------------------

local ending "selectionUnadj"

generate logbase_z_c_`ending' = logbase_z_c_baseline 

reghdfe logbase_z_c_`ending' exp exp_sqrd if valid_educ & valid_school_selec & home_country & universitycountry != "", absorb(fe_school=school yearofsalary)

bys school : egen obs_school_`ending' = sum(e(sample))

bys school : egen q_j_`ending' = max(fe_school)

drop fe_school

*--------------------------------
* Selection adjusted
*--------------------------------

local ending "selection"

reghdfe logbase_z_c_`ending' exp exp_sqrd if valid_educ & valid_school_selec & home_country & universitycountry != "", absorb(fe_school=school yearofsalary)

bys school : egen obs_school_`ending' = sum(e(sample))

bys school : egen q_j_`ending' = max(fe_school)

drop fe_school

*--------------------------------
* Plot countries with/without selection
*--------------------------------

preserve

	bys universitycountry : egen selection_country = max(selectionCountry)
	
	keep if q_j_baseline != .
	
	bys universitycountry school: keep if _n == 1

	generate country_glassdoor = universitycountry
	
	merge m:1 country_glassdoor using "`tempPath'/Country_gdppw.dta"
	
	keep if _merge == 3
	drop _merge
	
	bys universitycountry : generate newUniversityCountry = _n == 1
	
	*--------------------------------------------------
	* Average university premia for top schools in each country - baseline
	*--------------------------------------------------
	
	local ending "selectionUnadj"

	foreach topUniversityThresh of numlist 5 {
		
		disp `topUniversityThresh'
	
		generate pct_thresh = `topUniversityThresh' / 100
		
		bys universitycountry : egen avg_q_j_`topUniversityThresh' = mean(q_j_`ending') if national_rank_pct <= pct_thresh & national_rank != .

		bys universitycountry : egen avg_`topUniversityThresh' = max(avg_q_j_`topUniversityThresh')
		
		drop pct_thresh
	}
	
	* Plot scatter plot of z_c and gdp
	reg avg_5 log_gdppw_uc if newUniversityCountry & selection_country
	scalar coef_`ending' = _b["log_gdppw_uc"]
	
	twoway lfitci avg_5 log_gdppw_uc if newUniversityCountry & selection_country, clcolor(gs2) || ///
	scatter avg_5 log_gdppw_uc if newUniversityCountry & selection_country, mlabel(iso) ms(i)  mlabcolor(black) legend(off) mlabsize(6.5pt) xscale(range(9.375 12.075)) ///
		yscale(r(-0.6(0.2)0.6)) ylabel(-0.6(0.2)0.6)   ///
		xtitle("GDP per worker, 2019 intl $") xlabel(9.68 "16000" 10.373 "32000" 11.067 "64000" 11.760 "128000") ytitle("college graduate quality") 
	graph export "`figurePath'/Figure_B6a.eps" , replace
	
	drop avg_q_j_* avg_* 
	
	*--------------------------------------------------
	* Average university premia for top schools in each country - selection
	*--------------------------------------------------
	
	local ending "selection"

	foreach topUniversityThresh of numlist 5 {
		
		disp `topUniversityThresh'
	
		generate pct_thresh = `topUniversityThresh' / 100
		
		bys universitycountry : egen avg_q_j_`topUniversityThresh' = mean(q_j_`ending') if national_rank_pct <= pct_thresh & national_rank != .

		bys universitycountry : egen avg_`topUniversityThresh' = max(avg_q_j_`topUniversityThresh')
		
		drop pct_thresh
	}
	
	* Plot scatter plot of z_c and gdp
	reg avg_5 log_gdppw_uc if newUniversityCountry & selection_country
	scalar coef_`ending' = _b["log_gdppw_uc"]
	
	twoway lfitci avg_5 log_gdppw_uc if newUniversityCountry & selection_country, clcolor(gs2) || ///
	scatter avg_5 log_gdppw_uc if newUniversityCountry & selection_country, mlabel(iso) ms(i)  mlabcolor(black) legend(off) mlabsize(6.5pt) xscale(range(9.375 12.075)) ///
		yscale(r(-0.6(0.2)0.6)) ylabel(-0.6(0.2)0.6)   ///
		xtitle("GDP per worker, 2019 intl $") xlabel(9.68 "16000" 10.373 "32000" 11.067 "64000" 11.760 "128000") ytitle("college graduate quality") 
		graph export "`figurePath'/Figure_B6b.eps" , replace
	
	drop avg_q_j_* avg_* 
	
restore

*--------------------------------
* Only non-U.S. earnings 
*--------------------------------

local ending "excludeUS"

reghdfe logbase_z_c_`ending' exp exp_sqrd if hasCountryPremiaExcludeUS & valid_educ & valid_school_excUS & home_country & universitycountry != "" & countryname != "United States", absorb(fe_school=school yearofsalary)

bys school : egen obs_school_`ending' =sum(e(sample))

bys school : egen q_j_`ending' = max(fe_school)

drop fe_school

*--------------------------------
* Add firm FE
*--------------------------------

local ending "firm"

reghdfe logbase_z_c_`ending' exp exp_sqrd if valid_educ & valid_school & home_country & universitycountry != "", absorb(fe_school=school fk_employerid yearofsalary)

bys school : egen obs_school_`ending' =sum(e(sample))

bys school : egen q_j_`ending' = max(fe_school)

drop fe_school

*--------------------------------
* Add metro area FE and tabulate metros per country
*--------------------------------

local ending "metro"

reghdfe logbase_z_c_`ending' exp exp_sqrd if valid_educ & valid_school & home_country & universitycountry != "" & metro != "", absorb(fe_school=school metro yearofsalary)

bys school : egen obs_school_`ending' =sum(e(sample))

bys school : egen q_j_`ending' = max(fe_school)

drop fe_school

* Metro in each country

local seed "C:\Users\jsock\Dropbox\Research\GD\International"

local dataPath "`seed'/Data"
local inputPath "`seed'/InputData"
local figurePath "`seed'/Replication/Figures"
local tablePath "`seed'/Replication/Tables"
local estimatePath "`seed'/Replication/Estimates"
local tempPath "`seed'/Replication/TempData"

preserve

	drop if metro == ""
	
	bys countryname metro: egen inSample = max(e(sample))

	bys countryname metro: keep if inSample

	bys countryname metro: keep if _n == 1
	
	generate country_metros = 1
	
	collapse (sum) country_metros, by(countryname)

	sort countryname
	
	outsheet using "`estimatePath'\Total_metros_each_country.csv" , comma replace

restore

*--------------------------------
* Add job title FE
*--------------------------------

local ending "job"

reghdfe logbase_z_c_`ending' exp exp_sqrd if valid_educ & valid_school & home_country & universitycountry != "", absorb(fe_school=school jobtitle yearofsalary)

bys school : egen obs_school_`ending' =sum(e(sample))

bys school : egen q_j_`ending' = max(fe_school)

drop fe_school

*--------------------------------
* Restrict to only domestic workers
*-------------------------------- 

local ending "includeAbroad"

reghdfe logbase_z_c_`ending' exp exp_sqrd if valid_educ & valid_school & universitycountry != "" , absorb(fe_school=school yearofsalary)

bys school : egen obs_school_`ending' =sum(e(sample))

bys school : egen q_j_`ending' = max(fe_school)

drop fe_school

* Number of foreign graduates with advanced degrees
summarize alsoGrad if valid_educ & valid_school & universitycountry != ""
scalar share_foreign_alsograd = r(mean)

*--------------------------------
* Include non-domestic workers and reweight by migrant flows
*-------------------------------- 

local ending "includeAbroadWt"

	*-----------------------------
	* Read in graduate remain in country share from DIOC
	*-----------------------------
	
	preserve
	
		clear
		
		insheet using "`inputPath'/File-2-DIOC-E-2010-11-Labour-Force-Status.csv" , comma
		
		* Keep skilled migrants (with tertiary education)
		keep if education == 3

		collapse (sum) number , by(coub country)

		* Determine migration flow by country of study
		bys coub: egen total_number = sum(number)
		
		generate share_graduates_stay = number / total_number
		
		keep if coub == country
		
		keep coub share_graduates_stay
		
		rename coub university_country_iso

		tempfile temp_immigrants
		save `temp_immigrants', emptyok 
		
	restore
	
	merge m:1 university_country_iso using `temp_immigrants'
	drop _merge
	
	*-----------------------------
	* Obtain regression samples for reweighting
	*-----------------------------
	
	reghdfe logbase_z_c_`ending' exp exp_sqrd if valid_educ & valid_school & universitycountry != "" & share_graduates_stay != . , absorb(school yearofsalary)
	gen wtSample = e(sample)
	
	bys universitycountry : egen domesticN = sum(wtSample * (universitycountry == countryname))
	bys universitycountry : egen abroadN= sum(wtSample * (universitycountry != countryname))
	
	generate wt = (1 / domesticN) * share_graduates_stay if universitycountry == countryname
	replace wt = (1 / abroadN) * (1 - share_graduates_stay) if universitycountry != countryname
	
	drop domesticN abroadN wtSample
	
	reghdfe logbase_z_c_`ending' exp exp_sqrd if valid_educ & valid_school & universitycountry != "" & share_graduates_stay != . [aw=wt], absorb(fe_school=school yearofsalary)
		
	bys school : egen obs_school_`ending' =sum(e(sample))

	bys school : egen q_j_`ending' = max(fe_school)

	drop fe_school

*--------------------------------
* N=50 threshold
*--------------------------------

local ending "thresh50"

reghdfe logbase_z_c_`ending' exp exp_sqrd if valid_educ & valid_school_t50 & home_country & universitycountry != "", absorb(fe_school=school yearofsalary)

bys school : egen obs_school_`ending' =sum(e(sample))

bys school : egen q_j_`ending' = max(fe_school)

drop fe_school

*--------------------------------
* Major FE
*--------------------------------

local ending "majorFE"

tab grpmajor if majorStem != "", gen(m_)
drop m_3

eststo clear

eststo: reghdfe logbase_z_c_`ending' exp exp_sqrd m_* if valid_educ & valid_school_major & home_country & universitycountry != "" & majorStem != "", absorb(fe_school=school yearofsalary) vce(cluster school)

	label var m_1 "Major: Arts and Humanities"
	label var m_2 "Major: Biological Sciences"
	label var m_4 "Major: Communication"
	label var m_5 "Major: Education"
	label var m_6 "Major: Engineering"
	label var m_7 "Major: Health Services"
	label var m_8 "Major: Physical Sciences"
	label var m_9 "Major: Social Sciences"
	label var m_10 "Major: Social Services"
	label var m_11 "Major: Technology"

	* Store information for plotting
	local texFile = "`tablePath'/Table_B2.tex"

	#delimit ; 
	esttab using "`texFile'", b(%6.3f) se(%5.3f) label compress replace obslast depvars nocons nomtitles
		legend star(* 0.10 ** 0.05 *** 0.01) eqlabels(none) 
		stats(N r2, 
						fmt(%9.0g %3.2f) 
						labels("Observations" 
							   "R$^2$")) 
		drop(_cons exp exp_sqrd)
		prehead(
			\begin{tabular}{l*{@M}{c}} \hline \hline \noalign{\smallskip}
		)
		posthead(\hline \noalign{\smallskip})
		prefoot(\hline \noalign{\smallskip} )
		postfoot(
			\hline \hline 
			\end{tabular}
		)
	; 
	#delimit cr

bys school : egen obs_school_`ending' =sum(e(sample))

bys school : egen q_j_`ending' = max(fe_school)

drop fe_school

*--------------------------------
* At most undergrad
*--------------------------------

local ending "onlyBach"

reghdfe logbase_z_c_`ending' exp exp_sqrd if valid_educ & valid_school_onlyu & home_country & universitycountry != "" & onlyUndergrad, absorb(fe_school=school yearofsalary)

bys school : egen obs_school_`ending' =sum(e(sample))

bys school : egen q_j_`ending' = max(fe_school)

drop fe_school

*--------------------------------
* Separate undergrad and grad
*--------------------------------

local ending "separateGrad"

	*--------------------------------------------------
	* Add number of universities for graduate countries
	*--------------------------------------------------

	preserve

		clear
		
		insheet using "`inputPath'\Country_university_total.csv" , comma
		
		rename country universitycountry_2
		rename numberofuniversities numberuniversities_2
			
		tempfile in_results

		save `in_results', emptyok
		
	restore

	merge m:1 universitycountry_2 using `in_results'
	drop if _merge == 2
	drop _merge

	generate rankBin2 = ""
	replace rankBin2 = "A_01_20" if inrange(world_rank_2,1,20)
	replace rankBin2 = "B_21_50" if inrange(world_rank_2,21,50)
	replace rankBin2 = "C_51_100" if inrange(world_rank_2,51,100)
	replace rankBin2 = "D_101_250" if inrange(world_rank_2,101,250)
	replace rankBin2 = "E_251_500" if inrange(world_rank_2,251,500)
	replace rankBin2 = "F_501_1000" if inrange(world_rank_2,501,1000)
	replace rankBin2 = "G_1001_2000" if inrange(world_rank_2,1001,2000)
	replace rankBin2 = "H_UNRANKED" if world_rank_2 == . & universitycountry_2 != ""
		
	generate national_rank_pct_2 = national_rank_2 / numberuniversities_2

	*------------------------
	* Determine grad school sample sizes for inclusion
	*------------------------
	
	generate valid_educ_2 = alsoGrad & school_2 != "" & universitycountry_2 != "" & ~foreign_country_2 & valid_educ
	
	bys school_2: egen schoolObsSelect_2 = sum(hasCountryPremia & valid_educ_2)

	********************************************************
	* Determine step 2 sample thresholds 
	********************************************************

	bys school_2: egen schoolObs_2 = sum(hasCountryPremia & valid_educ_2)

	bys school_2: egen schoolObsLocal_2 = sum(hasCountryPremia & valid_educ_2 & universitycountry_2 == countryname)

	generate valid_school_2 = schoolObsLocal_2 >= school_thresh 
	
	*------------------------
	* Create school 1 numerical identifer
	*------------------------
	
	bys school: gen row = _n
	
	sort row school
	bys row: gen schoolID = _n
	replace schoolID = . if row != 1

	bys school: egen school_id = max(schoolID)
	
	drop schoolID row
	
	*------------------------
	* Create school 2 numerical identifer
	*------------------------
	
	bys school_2: gen row = _n
	
	sort row school_2
	bys row: gen school2ID = _n
	replace school2ID = . if row != 1

	bys school_2: egen school2_id = max(school2ID)
	
	drop school2ID row

	*------------------------
	* Estimate q_j_undergrad and q_j_grad 
	*------------------------

	replace valid_school_2 = 1 if school_2 == ""

	replace valid_school_2 = 0 if degree_2 == "UNMATCHED"
		
	reghdfe logbase_z_c_`ending' exp exp_sqrd if (onlyUndergrad | alsoGrad) & valid_educ & valid_school & home_country & valid_school_2 & universitycountry != "" & ~foreign_country_2, absorb(fe_school=school_id fe_school_2=school2_id yearofsalary)

	bys school : egen obs_school_`ending' =sum(e(sample))

	bys school : egen q_j_`ending' = max(fe_school)

	drop fe_school

	*------------------------
	* Fill in throughout the sample
	*------------------------

	generate sample_`ending' = e(sample) 
	bys school : egen obs_u_`ending' = sum(sample_`ending')
	bys school_2 : egen obs_g_`ending' = sum(sample_`ending')

	bys school_2 : egen q_j_g_`ending' = max(fe_school_2)

	drop fe_school fe_school_2 
	
	*------------------------
	* Save school premium estimates
	*------------------------

	* Undergraduate premia
	preserve
		
		keep if q_j_`ending' != .
		
		bys school: keep if _n == 1
		
		keep school university_country_iso world_rank q_j_`ending' obs_u_`ending'

		rename school merge_school
		rename q_j_`ending' merge_q_j_`ending'

		gen neg_obs = merge_q_j_`ending' * -1
		sort neg_obs
		drop neg_obs
			
		* Make relative to a single university for comparison
		foreach my_var of varlist merge_q_j_*{
			
			generate utaustin = `my_var' if merge_school == "The University of Texas at Austin"
			
			egen std_utaustin = max(utaustin)
			
			replace `my_var' = `my_var' - std_utaustin
			
			drop utaustin std_utaustin
		}

		outsheet using "`estimatePath'/Q_j_`ending'.csv" , comma replace

		keep merge_school merge_q_j_`ending' 

		save "`estimatePath'/Q_j_`ending'.dta" , replace
	
	restore
	
	* Graduate premia
	preserve
		
		keep if q_j_g_`ending' != .
		
		bys school_2: keep if _n == 1
		
		keep school_2 university_country_iso_2 world_rank_2 q_j_g_`ending' obs_g_`ending'

		rename school_2 merge_school
		rename q_j_g_`ending' merge_q_j_g_`ending'

		gen neg_obs = merge_q_j_g_`ending' * -1
		sort neg_obs
		drop neg_obs
			
		* Make relative to a single university for comparison
		foreach my_var of varlist merge_q_j_*{
			
			generate utaustin = `my_var' if merge_school == ""
			
			egen std_utaustin = max(utaustin)
			
			replace `my_var' = `my_var' - std_utaustin
			
			drop utaustin std_utaustin
		}		

		outsheet using "`estimatePath'/Q_j_g_`ending'.csv" , comma replace
		
		keep merge_school merge_q_j_g_`ending' 

		save "`estimatePath'/Q_j_g_`ending'.dta" , replace
	
	restore
		
	*------------------------
	* Estimate relation between q_j_graduate and CWUR world ranking
	*------------------------

	preserve
		
		bys school: generate newUniversity = _n == 1
		bys school_2: generate newUniversity2 = _n == 1
		
		eststo clear

		tab rankBin, gen(r_)
		
		eststo: reg q_j_`ending' r_1-r_7 if newUniversity [aw=obs_u_`ending']

			foreach myNum of numlist 1/8{
				tab r_`myNum' if e(sample), matcell(tabResults)
				local nextTotal = tabResults[2,1]
				estadd loc obs_`myNum' "`nextTotal'", replace 			
				disp `obs_`myNum''	    
			}
				
		drop r_*

		replace rankBin2 = "I_NONE" if school_2 == ""
		tab rankBin2, gen(r_)
		
		eststo: reg q_j_g_`ending' r_1-r_8 if newUniversity2 [aw=obs_g_`ending']

			foreach myNum of numlist 1/8{
				tab r_`myNum' if e(sample), matcell(tabResults)
				local nextTotal = tabResults[2,1]
				estadd loc obs_`myNum' "`nextTotal'", replace 			
				disp `obs_`myNum''	    
			}
		
		label var r_1 "World rank: 1--20"
		label var r_2 "World rank: 21--50"
		label var r_3 "World rank: 51--100"
		label var r_4 "World rank: 101--250"
		label var r_5 "World rank: 251--500"
		label var r_6 "World rank: 501--1000"
		label var r_7 "World rank: 1001--2000"
		label var r_8 "World rank: Unranked"

		* Store information for plotting
		local texFile = "`tablePath'/Table_10_vertical.tex"

		#delimit ; 
		esttab using "`texFile'", b(%6.3f) se(%5.3f) label compress replace obslast depvars nocons nomtitles
			legend star(* 0.10 ** 0.05 *** 0.01) eqlabels(none) nonum
			stats(obs_1 obs_2 obs_3 obs_4 obs_5 obs_6 obs_7 obs_8 N r2_a, 
							fmt(%9.0g %9.0g %9.0g %9.0g %9.0g %9.0g %9.0g %9.0g %9.0g %3.2f) 
							labels("N: Rank 1--20" 
								   "N: Rank 21--50" 
								   "N: Rank 51--100" 
								   "N: Rank 101--250" 
								   "N: Rank 251--500" 
								   "N: Rank 501--1000" 
								   "N: Rank 1001--2000" 
								   "N: Rank Unranked" 
								   "N: Total" 
								   "Adjusted R$^2$")) 
			drop(_cons )
			prehead(
				\begin{tabular}{l*{@M}{c}} \hline \hline \\
						& \multicolumn{7}{c}{\shortstack{World ranking}} \\
						\cmidrule(l){2-8}  
						& \multicolumn{1}{c}{\shortstack{1--20}} 
						& \multicolumn{1}{c}{\shortstack{21--50}}
						& \multicolumn{1}{c}{\shortstack{51--100}}
						& \multicolumn{1}{c}{\shortstack{101--250}}
						& \multicolumn{1}{c}{\shortstack{251--500}} 
						& \multicolumn{1}{c}{\shortstack{501--1000}} 
						& \multicolumn{1}{c}{\shortstack{1001--2000}}
						& \multicolumn{1}{c}{\shortstack{Unranked}} \\
			)
			posthead(\hline \\)
			prefoot(\\ \hline \noalign{\smallskip} )
			postfoot(
				\hline \hline \noalign{\smallskip}
				\end{tabular}
			)
		; 
		#delimit cr

		drop r_*
		
	restore
	
*--------------------------------
* Merge in alternative truncation results
*--------------------------------

local ending "altTrunc"

merge m:1 school using "`estimatePath'\Estimates_q_j_`ending'.dta"
	
drop if _merge == 2
drop _merge

*--------------------------------
* Merge in imputation for missing degree results
*--------------------------------

local ending "imputeMissing"

merge m:1 school using "`estimatePath'\Estimates_q_j_`ending'.dta"
drop if _merge == 2
drop _merge

********************************************************
* Look at relation between q_j for top universities and gdppw
********************************************************

*--------------------------------
* Make relative to a single university for comparison (leave out excludeUS)
*--------------------------------

local endings "baseline firm job includeAbroad thresh50 onlyBach separateGrad selection addVP projectGDP altTrunc majorFE obsNumFE includeAbroadWt excFounderCSuite longTime imputeMissing metro excludeIndiaToUS"

foreach my_ending of local endings{

	generate utaustin = q_j_`my_ending' if school == "The University of Texas at Austin"
	
	egen std_utaustin = max(utaustin)
	
	replace q_j_`my_ending' = q_j_`my_ending' - std_utaustin
	
	drop utaustin std_utaustin
}

*--------------------------------
* Run cross country regressions
*--------------------------------

local endings "baseline firm job includeAbroad thresh50 onlyBach separateGrad selection addVP projectGDP altTrunc majorFE obsNumFE includeAbroadWt excFounderCSuite longTime imputeMissing excludeUS metro excludeIndiaToUS" 

foreach my_ending of local endings{

	preserve
		
		eststo clear
		
		keep if q_j_`my_ending' != .
		
		bys universitycountry school: keep if _n == 1

		bys universitycountry : generate newUniversityCountry = _n == 1
	
		generate country_glassdoor = universitycountry
		
		merge m:1 country_glassdoor using "`tempPath'/Country_gdppw.dta"
		drop if _merge == 2
		
		* TOP 5% AVG Q_J	

		foreach topUniversityThresh of numlist 5 {
			
			disp `topUniversityThresh'
		
			generate pct_thresh = `topUniversityThresh' / 100
			
			bys universitycountry : egen avg_q_j_`topUniversityThresh' = mean(q_j_`my_ending') if national_rank_pct <= pct_thresh & national_rank != .

			bys universitycountry : egen avg_pct_`topUniversityThresh' = max(avg_q_j_`topUniversityThresh')
			
			drop pct_thresh
		} 
								
		eststo: reg avg_pct_5 log_gdppw_uc if newUniversityCountry 
			estadd loc wtd "", replace 			
		drop avg_q_j_5 
		
		* NOT TOP 5% AVG Q_J

		bys universitycountry : egen avg_q_j_low = mean(q_j_`my_ending') if national_rank_pct > 0.05 | national_rank == .
		
		bys universitycountry : egen avg_low = max(avg_q_j_low)
		
		eststo: reg avg_low log_gdppw_uc if newUniversityCountry 
			estadd loc wtd "", replace 			
		drop avg_q_j_low 
			
		twoway scatter avg_low log_gdppw_uc 
		label var log_gdppw_uc "Log(gdppw)"
				
		* ADD SOME ROBUSTNESS STATISTICS
		
		* Correlation of z with baseline
		local corrVal : di %4.2f corr_z_`my_ending'
		estadd loc corr_z "`corrVal'", replace 			
		
		* Correlation of z with gdppw
		local corrVal : di %4.2f corr_gdp_`my_ending'
		estadd loc corr_gdp "`corrVal'", replace 	
		
		* Correlation of q with baseline
		pwcorr q_j_`my_ending' q_j_baseline
		matrix corrMat = r(C)
		local corrVal : di %4.2f corrMat[1,2]
		estadd loc corr_q "`corrVal'", replace 			
		
		* Store information for plotting
		local texFile = "`tablePath'/Table_9_`my_ending'.tex"

		#delimit ; 
		esttab using "`texFile'", b(%6.3f) se(%5.3f) label compress replace obslast depvars nocons nomtitles
			legend star(* 0.10 ** 0.05 *** 0.01) eqlabels(none) nonum
			stats(corr_z corr_q corr_gdp N r2_a, 
							fmt(%3.2f %3.2f %3.2f %9.0g %3.2f) 
							labels("Baseline correlation with z" 
								   "Baseline correlation with q" 
								   "GDPPW correlation with z" 
								   "N" 
								   "Adjusted R$^2$")) 
			drop(_cons )
			prehead(
				\begin{tabular}{l*{@M}{c}} \hline \hline \\
						& \multicolumn{1}{c}{Top Colleges} 
						& \multicolumn{1}{c}{Not-Top Colleges} \\
					\cmidrule(l){2-2} \cmidrule(l){3-3} 
			)
			posthead(\\)
			prefoot(\\ \hline \noalign{\smallskip} )
			postfoot(
				\hline \hline \noalign{\smallskip}
				\end{tabular}
			)
		; 
		#delimit cr

	restore

}
